* Prepare data for PyTwoWay package run (firmtwfe03) 
cap noisily program drop firmmoverbias 
cap noisily program drop prep_pytwoway 
cap noisily program drop twfe_baseline 
cap noisily program drop card_vardecomp

program firmmoverbias
	prep_pytwoway
	twfe_baseline
	card_vardecomp
end

program prep_pytwoway
	
	* Load data and restrict to firm twfe-sample
	use "${intermediate_data}/twfe/physicians_movers_EINsize15.dta", clear
	keep if mover_type == "Single Movers" & inrange(age, 40,55) // keep single movers aged 40-55

	* Generate firm id
	ren ein_adj j // "firm id" 
		
	* Ren and label variables
	gen y =  logptotinc
	ren id i // worker id
	ren year t
	
	* Restrict to relevant varibles
	keep y i j t 
		
	compress
	save "${intermediate_data}/twfe/physicians_movers_EINsize15_biascorrection.dta", replace

end

* Run manual regressions without controls to use for 
* comparison with the python package output

program twfe_baseline

	* Load data and restrict to twfe-sample
	use "${intermediate_data}/twfe/physicians_movers_EINsize15.dta", clear
	xtset id year 
	
	* Estimation 
	reghdfe logptotinc, a(ein_adj id, savefe) resid
	local N_UR = e(N)
	keep if e(sample)
			
	keep logptotinc ein_adj id year __hdfe* _reghdfe_resid
	rename (__hdfe1__ __hdfe2__) (__hdfeFIRM__ __hdfeINDIVID__)
	gen N_UR = `N_UR'
	save "${temp}/IndividFixedEffects_EINsize15.dta", replace

	gcollapse logptotinc __hdfe*__ N_UR, by(ein_adj)
	save "${temp}/FirmFixedEffects_EINsize15.dta", replace
		
end

* Variance decomposition

program card_vardecomp
	use "${temp}/IndividFixedEffects_EINsize15.dta", clear
	
	local N_UR = `=N_UR[1]'
		
	rename (__hdfeFIRM__ __hdfeINDIVID__) (__hdfe1__ __hdfe2__) 
	
	preserve
		foreach y in 1 2 {
			cap confirm variable __hdfe`y'__
			if !_rc {
				qui sum __hdfe`y'__
				gen sd_hdfe`y' = `r(sd)'
				gen CoVar_hdfe`y' = `r(Var)'

				foreach z in 1 2 {
					cap confirm variable __hdfe`z'__
					if !_rc & `y' < `z' {
						qui corr __hdfe`y'__ __hdfe`z'__
						gen  Corr_hdfe`y'`z' = r(C)[1,2]

						qui corr __hdfe`y'__ __hdfe`z'__, cov
						gen CoVar_hdfe`y'`z' = `r(cov_12)'
					}
				}
			}
		}

		* Variance of the residual
		qui sum _reghdfe_resid 
		gen CoVar_resid = `r(Var)'

		gcollapse (sd) sd_logptotinc = logptotinc (first) sd_* Corr_* CoVar_*

		gen CoVar_logptotinc = sd_logptotinc * sd_logptotinc

		gen CoVar_recomposed =	CoVar_hdfe1 + CoVar_hdfe2 + 2*CoVar_hdfe12 + CoVar_resid
		
		gen sd_recomposed = sqrt(CoVar_recomposed)
		gen spec = "Person-Year Level"
		gen sample = "firm-twfe (physicians)"
		save "${temp}/personyear_physicians_EINsize15_nocontrols.dta", replace
	restore

	gcollapse logptotinc __hdfe* _reghdfe_resid, by(ein_adj)

	foreach y in 1 2 {
		cap confirm variable __hdfe`y'__
		if !_rc {
			qui sum __hdfe`y'__, d
			gen sd_hdfe`y' = `r(sd)'
			gen CoVar_hdfe`y' = `r(Var)'

			foreach z in 1 2 {
				cap confirm variable __hdfe`z'__
				if !_rc & `y' < `z' {
					qui corr __hdfe`y'__ __hdfe`z'__
					gen  Corr_hdfe`y'`z' = r(C)[1,2]

					qui corr __hdfe`y'__ __hdfe`z'__, cov
					gen CoVar_hdfe`y'`z' = `r(cov_12)'
				}
			}
		}
	}

	* Variance of the residual
	qui sum _reghdfe_resid 
	gen CoVar_resid = `r(Var)'

	gcollapse (sd) sd_logptotinc = logptotinc (first) sd_* Corr_* CoVar_*

	gen CoVar_logptotinc = sd_logptotinc * sd_logptotinc

	gen CoVar_recomposed =	CoVar_hdfe1 + CoVar_hdfe2 + 2*CoVar_hdfe12 + CoVar_resid

	gen sd_recomposed = sqrt(CoVar_recomposed)
	
	gen sample = "firm-twfe (physicians)"
	gen spec = "Firm Level"
	append using "${temp}/personyear_physicians_EINsize15_nocontrols.dta"

	reshape long sd_ CoVar_ Corr_, i(spec) j(effect) string
	
	replace effect = subinstr(effect, "12", "Firm/Individ", .)
	replace effect = subinstr(effect, "1", "Firm", .)
	replace effect = subinstr(effect, "2", "Individ", .)
	replace effect = subinstr(effect, "hdfe", "", .)
		
	gen id =.
	replace id = 1 if effect == "logptotinc"
	replace id = 2 if effect == "recomposed"
	replace id = 3 if effect == "Firm"
	replace id = 4 if effect == "Individ"
	replace id = 5 if effect == "Firm/Individ"
		
	bys spec: gen temp = CoVar_ if effect == "recomposed"
	egen temp2 = max(temp), by(spec)
	by spec: gen Var_share = CoVar_ / temp2
	
	gen N_UR = `N_UR'
	
	order sample spec, first 
	gsort sample -spec id
	drop id temp temp2
	order N_UR, last
	save "$output/twfe/firm_twfe/earnings_decomp_card_nocontrols_physicians_EINsize15.dta", replace
	
end

firmmoverbias 

